#coding=utf-8
import csv
import lyrics_list
import requests
import pandas as pd
import numpy as np
import time
import re
import urllib2
import urllib
import nltk

def download_page(url):

    header = {'User-Agent': 'Mozilla/5.0(Windows NT 10.0;Win64;x64;rv:54.0)Gecko/20100101 Firefox/54.0'}
    html = requests.get(url, headers=header).content

    return html

def parse_url(html,lyrics_list):
    pattern = re.compile('Sorry about that. -->(.*?)</div>', re.S)
    lyricsList = re.findall(pattern, html)
    i=1
    for lyrics in lyricsList:
        lyrics=lyrics.replace("<br>","")
        lyrics_list.append(lyrics)
        print i
        i=i+1;
        print lyrics

    return lyrics_list

def main():

    name=raw_input("输入文件名")
    lyricsRawList=[]
    with open(name+'.csv', 'rb') as csvfile:
        reader = csv.reader(csvfile)
        column = [row[2] for row in reader]
    num=0

    sun = pd.DataFrame(np.zeros((len(column)-1, 1)), columns=['songLyrics'])
    for url in column:
        if url!='songLink':
            time.sleep(3)
            HTML=download_page(url)

            lyricsRawList=parse_url(HTML,lyricsRawList)

    for i in range(0,len(column)-1):
        sun['songLyrics'].loc[num]=lyricsRawList[i]
        num=num+1
    sun.to_csv(name+'Lyrics.csv')
    print sun



def analy():
    inf = pd.read_csv('E:/@python/Lyrics/'+name + 'Lyrics.csv')
    text = ''.join(inf['songLyrics'])
    print text
    return text



if __name__ == '__main__':
    data=main()





